"""TXT标准规范模板配置文件

定义系统输出的标准化TXT文本模板和格式化函数
"""

# TXT输出标准模板
TXT_TEMPLATE = """
# 视频编码分析结果
# 生成时间: {timestamp}
# 视频ID: {video_id}

## 元数据字段
视频ID: {video_id}
分析时间: {timestamp}
分析状态: {status}
数据版本: {data_version}
处理阶段: {processing_stage}

## 编码字段（来自编码类目表）
### 场景设定
场景设定_选项: {scene_option}
场景设定_置信度: {scene_confidence}
场景设定_关键词匹配: {scene_keywords}
场景设定_分析依据: {scene_analysis}

### 声音素材  
声音素材_选项: {audio_option}
声音素材_置信度: {audio_confidence}
声音素材_分析依据: {audio_analysis}

### 主题类型
主题类型_选项: {theme_option}
主题类型_置信度: {theme_confidence}
主题类型_关键词匹配: {theme_keywords}
主题类型_分析依据: {theme_analysis}

### 视频情感倾向
视频情感倾向_选项: {emotion_option}
视频情感倾向_置信度: {emotion_confidence}
视频情感倾向_分析依据: {emotion_analysis}

### 话题标签
话题标签_选项: {topic_option}
话题标签_置信度: {topic_confidence}
话题标签_分析依据: {topic_analysis}

### 作者昵称类型
作者昵称类型_选项: {nickname_type_option}
作者昵称类型_置信度: {nickname_type_confidence}
作者昵称类型_分析依据: {nickname_type_analysis}

### 作者简介内容
作者简介内容_选项: {bio_content_option}
作者简介内容_置信度: {bio_content_confidence}
作者简介内容_分析依据: {bio_content_analysis}

## 非编码字段（来自样本数据表）
达人昵称: {nickname}
达人简介: {bio}
粉丝数: {fans_count}
获赞: {likes_received}
达人UID: {uid}
抖音号: {douyin_id}
视频描述: {video_description}
大家都在搜: {search_terms}
发布时间: {publish_time}
视频时长: {video_duration}
点赞量: {like_count}
收藏量: {collect_count}
评论量: {comment_count}
分享量: {share_count}
达人链接: {creator_link}
视频文件: {video_file}
音频文件: {audio_file}
视频链接: {video_link}

## 分析质量评估
总体置信度: {overall_confidence}
待补充字段数: {missing_fields_count}
分析完整性: {completeness_percentage}
质量评级: {quality_rating}
"""

# 默认值配置
DEFAULT_VALUES = {
    "timestamp": "待AI推断",
    "video_id": "待AI推断",
    "status": "待AI推断",
    "data_version": "v1.0",
    "processing_stage": "待AI推断",
    "scene_option": "待AI推断",
    "scene_confidence": "待AI推断",
    "scene_keywords": "待AI推断",
    "scene_analysis": "待AI推断",
    "audio_option": "待AI推断",
    "audio_confidence": "待AI推断",
    "audio_analysis": "待AI推断",
    "theme_option": "待AI推断",
    "theme_confidence": "待AI推断",
    "theme_keywords": "待AI推断",
    "theme_analysis": "待AI推断",
    "emotion_option": "待AI推断",
    "emotion_confidence": "待AI推断",
    "emotion_analysis": "待AI推断",
    "topic_option": "待AI推断",
    "topic_confidence": "待AI推断",
    "topic_analysis": "待AI推断",
    "nickname_type_option": "待AI推断",
    "nickname_type_confidence": "待AI推断",
    "nickname_type_analysis": "待AI推断",
    "bio_content_option": "待AI推断",
    "bio_content_confidence": "待AI推断",
    "bio_content_analysis": "待AI推断",
    "nickname": "待AI推断",
    "bio": "待AI推断",
    "fans_count": "待AI推断",
    "likes_received": "待AI推断",
    "uid": "待AI推断",
    "douyin_id": "待AI推断",
    "video_description": "待AI推断",
    "search_terms": "待AI推断",
    "publish_time": "待AI推断",
    "video_duration": "待AI推断",
    "like_count": "待AI推断",
    "collect_count": "待AI推断",
    "comment_count": "待AI推断",
    "share_count": "待AI推断",
    "creator_link": "待AI推断",
    "video_file": "待AI推断",
    "audio_file": "待AI推断",
    "video_link": "待AI推断",
    "overall_confidence": "待AI推断",
    "missing_fields_count": "待AI推断",
    "completeness_percentage": "待AI推断",
    "quality_rating": "待AI推断"
}

# TXT文件解析正则表达式配置
TXT_PARSING_PATTERNS = {
    "metadata": {
        "video_id": r"视频ID:\s*(.+)",
        "timestamp": r"分析时间:\s*(.+)",
        "status": r"分析状态:\s*(.+)",
        "data_version": r"数据版本:\s*(.+)",
        "processing_stage": r"处理阶段:\s*(.+)",
    },
    "encoding_fields": {
        "scene": {
            "option": r"场景设定_选项:\s*(.+)",
            "confidence": r"场景设定_置信度:\s*(.+)",
            "keywords": r"场景设定_关键词匹配:\s*(.+)",
            "analysis": r"场景设定_分析依据:\s*(.+)",
        },
        "audio": {
            "option": r"声音素材_选项:\s*(.+)",
            "confidence": r"声音素材_置信度:\s*(.+)",
            "analysis": r"声音素材_分析依据:\s*(.+)",
        },
        "theme": {
            "option": r"主题类型_选项:\s*(.+)",
            "confidence": r"主题类型_置信度:\s*(.+)",
            "keywords": r"主题类型_关键词匹配:\s*(.+)",
            "analysis": r"主题类型_分析依据:\s*(.+)",
        },
        "emotion": {
            "option": r"视频情感倾向_选项:\s*(.+)",
            "confidence": r"视频情感倾向_置信度:\s*(.+)",
            "analysis": r"视频情感倾向_分析依据:\s*(.+)",
        },
        "topic": {
            "option": r"话题标签_选项:\s*(.+)",
            "confidence": r"话题标签_置信度:\s*(.+)",
            "analysis": r"话题标签_分析依据:\s*(.+)",
        },
        "nickname_type": {
            "option": r"作者昵称类型_选项:\s*(.+)",
            "confidence": r"作者昵称类型_置信度:\s*(.+)",
            "analysis": r"作者昵称类型_分析依据:\s*(.+)",
        },
        "bio_content": {
            "option": r"作者简介内容_选项:\s*(.+)",
            "confidence": r"作者简介内容_置信度:\s*(.+)",
            "analysis": r"作者简介内容_分析依据:\s*(.+)",
        },
    },
    "non_encoding_fields": {
        "nickname": r"达人昵称:\s*(.+)",
        "bio": r"达人简介:\s*(.+)",
        "fans_count": r"粉丝数:\s*(.+)",
        "likes_received": r"获赞:\s*(.+)",
        "uid": r"达人UID:\s*(.+)",
        "douyin_id": r"抖音号:\s*(.+)",
        "video_description": r"视频描述:\s*(.+)",
        "search_terms": r"大家都在搜:\s*(.+)",
        "publish_time": r"发布时间:\s*(.+)",
        "video_duration": r"视频时长:\s*(.+)",
        "like_count": r"点赞量:\s*(.+)",
        "collect_count": r"收藏量:\s*(.+)",
        "comment_count": r"评论量:\s*(.+)",
        "share_count": r"分享量:\s*(.+)",
        "creator_link": r"达人链接:\s*(.+)",
        "video_file": r"视频文件:\s*(.+)",
        "audio_file": r"音频文件:\s*(.+)",
        "video_link": r"视频链接:\s*(.+)",
    },
    "quality_assessment": {
        "overall_confidence": r"总体置信度:\s*(.+)",
        "missing_fields_count": r"待补充字段数:\s*(.+)",
        "completeness_percentage": r"分析完整性:\s*(.+)",
        "quality_rating": r"质量评级:\s*(.+)",
    }
}

# 格式化函数配置
FORMATTERS = {
    "keywords_list": lambda keywords: f"[{', '.join(keywords)}]" if isinstance(keywords, list) else str(keywords),
    "confidence_value": lambda value: f"{value:.2f}" if isinstance(value, (int, float)) else str(value),
    "completeness_percent": lambda value: f"{value:.1f}%" if isinstance(value, (int, float)) else str(value),
    "timestamp": lambda value: value if value != "待AI推断" else value,
}